#build correlation dataframe for phase1 data
cor.frame<-data.frame(matrix(, nrow=0, ncol=10)) #init df
for (i in sig.pos){
frq<-as.numeric(as.vector(phase1.all.freqs.lfmm.hum[phase1.all.freqs.lfmm.hum$id == i,3:16]))
mod<-summary(lm(frq ~ sort.pops.phase1$hannual)) #perform linear reg
# store 9 values describing the correlation to feed into machine learning
cor.frame<-rbind(cor.frame, cbind(i,
max(abs(mod$residuals)),
median(mod$residuals),
mod$coefficients[1,1],
mod$sigma,
mod$r.squared,
mod$fstatistic[1],
cov(frq, sort.pops.phase1$hannual),
cor(frq, sort.pops.phase1$hannual),
sort(abs(frq-mean(frq)))[14]-sort(abs(frq-mean(frq)))[13]
)
)
}
#fix row/colnames
colnames(cor.frame)<-c("id","maxresid","medresid","intercept","stdevresid","rsquared","f","cov","cor","outlier")
rownames(cor.frame) <- c()
#make numeric columns numeric
for (i in 2:10){
cor.frame[,i]<-as.numeric(as.character(cor.frame[,i]))
}
#check
class(cor.frame$id)
## [1] "factor"
class(cor.frame$medresid)
## [1] "numeric"
#calc correlation between these variables
round(cor(cor.frame[,-1]), 2)
## maxresid medresid intercept stdevresid rsquared f cov cor
## maxresid 1.00 -0.05 -0.03 0.83 -0.69 -0.58 0.11 0.08
## medresid -0.05 1.00 0.24 0.19 0.07 0.07 -0.02 0.04
## intercept -0.03 0.24 1.00 0.15 0.01 0.09 -0.94 -0.88
## stdevresid 0.83 0.19 0.15 1.00 -0.57 -0.47 0.00 -0.03
## rsquared -0.69 0.07 0.01 -0.57 1.00 0.94 -0.06 -0.04
## f -0.58 0.07 0.09 -0.47 0.94 1.00 -0.13 -0.10
## cov 0.11 -0.02 -0.94 0.00 -0.06 -0.13 1.00 0.94
## cor 0.08 0.04 -0.88 -0.03 -0.04 -0.10 0.94 1.00
## outlier 0.60 -0.26 -0.33 0.15 -0.38 -0.35 0.28 0.30
## outlier
## maxresid 0.60
## medresid -0.26
## intercept -0.33
## stdevresid 0.15
## rsquared -0.38
## f -0.35
## cov 0.28
## cor 0.30
## outlier 1.00
#going to drop f and cov because they are highly correlated
cor.frame<-cor.frame[,c(1:6,9:10)]
We are going to drop f and cov due to high correlation w/ other variables.
#pull out our random subset to hand identify as training dataset
#we did this in the R script, and saved these 100 random SNPs to a csv
cor.training<-read.csv(file="~/Downloads/lfmm.hum.cor.training.csv")[,2:9]
par(mfrow=c(3,3))
#plot
#pull in the IDs of the 100 randomly selected training SNPs
sig.pos<-as.vector(cor.training$id)
for (i in sig.pos){
frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == i,3:16]))
plot(sort.pops.phase1$hannual,frq, main = i, ylim =c(0,1))
abline(lm(frq~sort.pops.phase1$hannual))
}
#hand classify this training set
#"environment"=0, "outlier"=1
vec<-c(0,1,0,0,0,1,1,0,1,
0,0,1,1,1,1,0,0,0,
0,1,1,1,0,1,1,0,1,
1,1,1,0,1,1,0,1,0,
1,0,1,0,0,0,0,0,1,
1,1,1,1,1,0,1,1,1,
1,1,1,0,0,1,0,1,1,
0,0,0,0,0,1,0,1,1,
0,0,0,1,0,1,1,0,0,
1,0,0,0,0,1,1,1,1,
0,1,0,1,0,0,0,0,1,
0)
vec[vec == 0]<-"environment"
vec[vec == 1]<-"outlier"
#add hand classification to the training set
cor.training$class<-as.factor(vec)
#write.csv(cor.training, file="~/Downloads/lfmm.hum.cor.training.csv")
#train the model (not including max residual which decreased prediction accuracy)
m <- naiveBayes(cor.training[,2:8], cor.training[,9])
#check predictions against training data
table(predict(m, cor.training[,2:8]), cor.training[,9])
##
## environment outlier
## environment 42 6
## outlier 7 45
cor.training$pred.class<-predict(m, cor.training[,2:8]) #add prediction to df
cor.training$prob.env<-predict(m, cor.training[,2:8], type="raw")[,1] #add prediction probability to df
cor.training$prob.out<-predict(m, cor.training[,2:8], type="raw")[,2] #add prediction probability to df
#do prediction
cor.frame$class<-predict(m, cor.frame[,2:8]) #add prediction to df
cor.frame$prob.env<-predict(m, cor.frame[,2:8], type="raw")[,1] #add prediction probability to df
cor.frame$prob.out<-predict(m, cor.frame[,2:8], type="raw")[,2] #add prediction probability to df
#visualize prediction
par(mfrow=c(1,2))
pc.df<-as.data.frame(prcomp(cor.training[,2:8])$x)
plot(pc.df$PC1,pc.df$PC2, col=cor.training$class, main = "hand classified")
plot(pc.df$PC1,pc.df$PC2, col=predict(m, cor.training[,2:8]), main = "ML predicted")
#histograms of individual variables by predicted class
hist(cor.frame$cor[cor.frame$class=="environment"], main="corr",
col = rgb(173,216,230,max = 255, alpha = 80, names = "lt.blue"), xlim=c(0,1), ylim=c(0,1750))
hist(cor.frame$cor[cor.frame$class=="outlier"],
col = rgb(255,192,203, max = 255, alpha = 80, names = "lt.pink"), add =TRUE)
hist(cor.frame$outlier[cor.frame$class=="environment"], main="outlier",
col = rgb(173,216,230,max = 255, alpha = 80, names = "lt.blue"), xlim=c(0,1))
hist(cor.frame$outlier[cor.frame$class=="outlier"],
col = rgb(255,192,203, max = 255, alpha = 80, names = "lt.pink"), add =TRUE)
#pca of all predicted points
par(mfrow=c(1,1))
pc.df<-as.data.frame(prcomp(cor.frame[,2:8])$x)
plot(pc.df$PC1,pc.df$PC2, col=cor.frame$class, main = "ML predicted")
#visualize specific training points
par(mfrow=c(3,3))
#plot
sig.pos<-as.vector(cor.training$id)
j<-1 #initialize loop tracker
for (i in sig.pos){
frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == i,3:16]))
if (cor.training$class[j] == cor.training$pred.class[j]){
plot(sort.pops.phase1$hannual,frq, main = i, ylim =c(0,1),
xlab=paste("prob env",round(cor.training$prob.env[j],2)), ylab=paste("prob out",round(cor.training$prob.out[j],2)))
abline(lm(frq~sort.pops.phase1$hannual))
}
else{
plot(sort.pops.phase1$hannual,frq, main = i, ylim =c(0,1), col = "red",
xlab=paste("prob env",round(cor.training$prob.env[j],2)), ylab=paste("prob out",round(cor.training$prob.out[j],2)))
abline(lm(frq~sort.pops.phase1$hannual), col="red")
}
j<-j+1
}
#check prediction
table(cor.frame$class)
##
## environment outlier
## 5878 5103
par(mfrow=c(1,1))
hist(cor.frame$prob.env)
#split confidently identified SNPs by class
cor.frame.outliers<-cor.frame[cor.frame$prob.out >.8,]
cor.frame.env<-cor.frame[cor.frame$prob.env >.8,]
#start with environmentally correlated SNPs
#generate matching id's for the outlier dataset in question
sig.pos<-as.vector(cor.frame.env$id)
#subset all AF values to only the outlier dataset, to save time rather than searching the entire 3M snp dataset
phase2.all.freqs.lfmm.hum <- phase2.all.freqs[phase2.all.freqs$id %in% sig.pos,]
#generate list of SNP IDs found in phase2
sig.pos<-phase2.all.freqs.lfmm.hum$id
#calc phase2 cor.frame
cor.frame.env.2<-data.frame(matrix(, nrow=0, ncol=8)) #init df
for (i in sig.pos){
frq<-as.numeric(as.vector(phase2.all.freqs.lfmm.hum[phase2.all.freqs.lfmm.hum$id == i,3:23]))
mod<-summary(lm(frq ~ sort.pops.phase2$hannual)) #perform linear reg
# store 9 values describing the correlation to feed into machine learning
cor.frame.env.2<-rbind(cor.frame.env.2, cbind(i,
max(abs(mod$residuals)),
median(mod$residuals),
mod$coefficients[1,1],
mod$sigma,
mod$r.squared,
cor(frq, sort.pops.phase2$hannual),
sort(abs(frq-mean(frq)))[21]-sort(abs(frq-mean(frq)))[20]
)
)
}
#fix row/colnames
colnames(cor.frame.env.2)<-c("id","maxresid","medresid","intercept","stdevresid","rsquared","cor","outlier")
rownames(cor.frame.env.2) <- c()
#make numeric columns numeric
for (i in 2:8){
cor.frame.env.2[,i]<-as.numeric(as.character(cor.frame.env.2[,i]))
}
#merge phase1 and phase2 data, retaining only SNPs called in both phases
match.cor.frame<-merge(cor.frame.env, cor.frame.env.2, by='id')
#subtract each column
diff.frame<-data.frame(id=match.cor.frame$id,
max.resid=abs(match.cor.frame$maxresid.x-match.cor.frame$maxresid.y),
med.resid=abs(match.cor.frame$medresid.x-match.cor.frame$medresid.y),
int=abs(match.cor.frame$intercept.x-match.cor.frame$intercept.y),
stdevresid=abs(match.cor.frame$stdevresid.x-match.cor.frame$stdevresid.y),
rsquared=abs(match.cor.frame$rsquared.x-match.cor.frame$rsquared.y),
cor=abs(match.cor.frame$cor.x-match.cor.frame$cor.y),
outlier=abs(match.cor.frame$outlier.x-match.cor.frame$outlier.y)
)
#pull out our random subset to hand identify as training dataset
#this step has been done, so we read in csv here
validate.env.training.set<-read.csv(file="~/Downloads/lfmm.hum.validate.env.csv")[,2:9]
par(mfrow=c(3,3))
#plot
sig.pos<-as.vector(validate.env.training.set$id)
for (i in sig.pos){
frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == i,3:16]))
frq2<-as.numeric(as.vector(phase2.all.freqs[phase2.all.freqs$id == i,3:23]))
plot(sort.pops.phase1$hannual,frq, main = i, ylim =c(0,1))
abline(lm(frq~sort.pops.phase1$hannual))
points(sort.pops.phase2$hannual,frq2, col="red")
abline(lm(frq2~sort.pops.phase2$hannual), col="red")
}
#"accept"=1,"reject"=0
vec<-c(1,1,1,1,1,1,1,0,1,
0,0,0,1,0,1,1,0,0,
0,0,1,0,1,0,0,1,1,
0,1,0,0,0,0,0,0,0,
0,1,0,1,0,1,1,1,1,
0,1,0,1,0,1,0,1,1,
1,1,1,1,1,1,1,1,1,
0,0,1,1,0,0,1,0,0,
0,1,1,1,0,1,0,0,0,
0,0,1,1,1,0,0,1,0,
1,0,1,1,1,1,1,0,1,
0)
vec[vec == 0]<-"reject"
vec[vec == 1]<-"accept"
#add hand classification to the training set
validate.env.training.set$class<-as.factor(vec)
#write.csv(validate.env.training.set, file="~/Downloads/lfmm.hum.validate.env.csv")
#train the model (not including max residual which decreased prediction accuracy)
m <- naiveBayes(validate.env.training.set[,2:8], validate.env.training.set[,9])
#check predictions against training data
table(predict(m, validate.env.training.set[,2:8]), validate.env.training.set[,9])
##
## accept reject
## accept 46 12
## reject 8 34
validate.env.training.set$pred.class<-predict(m, validate.env.training.set[,2:8]) #add prediction to df
validate.env.training.set$prob.accept<-predict(m, validate.env.training.set[,2:8], type="raw")[,1] #add prediction probability to df
#do prediction
diff.frame$class<-predict(m, diff.frame[,2:8]) #add prediction to df
diff.frame$prob.accept<-predict(m, diff.frame[,2:8], type="raw")[,1] #add prediction probability to df
#visualize prediction
par(mfrow=c(1,2))
pc.df<-as.data.frame(prcomp(validate.env.training.set[,2:8])$x)
plot(pc.df$PC1,pc.df$PC2, col=validate.env.training.set$class, main = "hand classified")
plot(pc.df$PC1,pc.df$PC2, col=predict(m, validate.env.training.set[,2:8]), main = "ML predicted")
#check prediction
table(diff.frame$class)
##
## accept reject
## 2437 2139
par(mfrow=c(1,1))
hist(diff.frame$prob.accept)
#split confidently identified SNPs by class
vetted.lfmm.hum.env<-diff.frame[diff.frame$prob.accept >.8,]
#add order column from allsigs to visualize this on a manhattan plot
all.sigs.env <- all.sigs[paste(all.sigs$chrom, all.sigs$pos) %in% vetted.lfmm.hum.env$id,]
all.sigs.env<-data.frame(order=all.sigs.env$order,
id=paste(all.sigs.env$chrom,all.sigs.env$pos))
vetted.lfmm.hum.env<-merge(all.sigs.env,vetted.lfmm.hum.env,by="id")
#write.csv(vetted.lfmm.hum.env[,1:2], file = "~/Downloads/vetted.lfmm.hum.env.csv")
#####
####
###
##
#now outliers
#generate matching id's for the outlier dataset in question
sig.pos<-as.vector(cor.frame.outliers$id)
#subset all AF values to only the outlier dataset, to save time rather than searching the entire 3M snp dataset
phase2.all.freqs.lfmm.hum <- phase2.all.freqs[phase2.all.freqs$id %in% sig.pos,]
#generate list of SNP IDs found in phase2
sig.pos<-phase2.all.freqs.lfmm.hum$id
#calc phase2 cor.frame
cor.frame.outliers.2<-data.frame(matrix(, nrow=0, ncol=8)) #init df
for (i in sig.pos){
frq<-as.numeric(as.vector(phase2.all.freqs.lfmm.hum[phase2.all.freqs.lfmm.hum$id == i,3:23]))
mod<-summary(lm(frq ~ sort.pops.phase2$hannual)) #perform linear reg
# store 9 values describing the correlation to feed into machine learning
cor.frame.outliers.2<-rbind(cor.frame.outliers.2, cbind(i,
max(abs(mod$residuals)),
median(mod$residuals),
mod$coefficients[1,1],
mod$sigma,
mod$r.squared,
cor(frq, sort.pops.phase2$hannual),
sort(abs(frq-mean(frq)))[21]-sort(abs(frq-mean(frq)))[20]
)
)
}
#fix row/colnames
colnames(cor.frame.outliers.2)<-c("id","maxresid","medresid","intercept","stdevresid","rsquared","cor","outlier")
rownames(cor.frame.outliers.2) <- c()
#make numeric columns numeric
for (i in 2:8){
cor.frame.outliers.2[,i]<-as.numeric(as.character(cor.frame.outliers.2[,i]))
}
#merge phase1 and phase2 data, retaining only SNPs called in both phases
match.cor.frame<-merge(cor.frame.outliers, cor.frame.outliers.2, by='id')
#subtract each column
diff.frame<-data.frame(id=match.cor.frame$id,
max.resid=abs(match.cor.frame$maxresid.x-match.cor.frame$maxresid.y),
med.resid=abs(match.cor.frame$medresid.x-match.cor.frame$medresid.y),
int=abs(match.cor.frame$intercept.x-match.cor.frame$intercept.y),
stdevresid=abs(match.cor.frame$stdevresid.x-match.cor.frame$stdevresid.y),
rsquared=abs(match.cor.frame$rsquared.x-match.cor.frame$rsquared.y),
cor=abs(match.cor.frame$cor.x-match.cor.frame$cor.y),
outlier=abs(match.cor.frame$outlier.x-match.cor.frame$outlier.y)
)
#pull out our random subset to hand identify as training dataset
#here we pull in the same SNPs randomly selected in the R script
validate.outliers.training.set<-read.csv(file="~/Downloads/lfmm.hum.validate.outliers.csv")
validate.outliers.training.set<-validate.outliers.training.set[,2:9]
par(mfrow=c(3,3))
#plot
sig.pos<-as.vector(validate.outliers.training.set$id)
for (i in sig.pos){
frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == i,3:16]))
frq2<-as.numeric(as.vector(phase2.all.freqs[phase2.all.freqs$id == i,3:23]))
plot(sort.pops.phase1$hannual,frq, main = i, ylim =c(0,1))
abline(lm(frq~sort.pops.phase1$hannual))
points(sort.pops.phase2$hannual,frq2, col="red")
abline(lm(frq2~sort.pops.phase2$hannual), col="red")
}
#"accept"=1,"reject"=0
vec<-c(0,0,0,0,1,1,1,0,0,
1,0,1,1,0,0,1,1,1,
1,0,0,1,1,0,0,0,1,
1,1,1,0,0,0,1,1,1,
0,0,0,1,1,1,1,0,0,
1,0,0,0,0,1,0,0,0,
0,1,1,0,1,1,0,0,0,
1,0,0,1,0,0,0,1,0,
1,1,1,0,0,1,1,0,1,
1,0,0,1,1,0,0,1,1,
1,0,0,0,0,0,1,1,0,
1)
vec[vec == 0]<-"reject"
vec[vec == 1]<-"accept"
#add hand classification to the training set
validate.outliers.training.set$class<-as.factor(vec)
#write.csv(validate.outliers.training.set, file="~/Downloads/lfmm.hum.validate.outliers.csv")
#train the model (not including max residual which decreased prediction accuracy)
m <- naiveBayes(validate.outliers.training.set[,2:8], validate.outliers.training.set[,9])
#check predictions against training data
table(predict(m, validate.outliers.training.set[,2:8]), validate.outliers.training.set[,9])
##
## accept reject
## accept 44 13
## reject 3 40
validate.outliers.training.set$pred.class<-predict(m, validate.outliers.training.set[,2:8]) #add prediction to df
validate.outliers.training.set$prob.accept<-predict(m, validate.outliers.training.set[,2:8], type="raw")[,1] #add prediction probability to df
#do prediction
diff.frame$class<-predict(m, diff.frame[,2:8]) #add prediction to df
diff.frame$prob.accept<-predict(m, diff.frame[,2:8], type="raw")[,1] #add prediction probability to df
#visualize prediction
par(mfrow=c(1,2))
pc.df<-as.data.frame(prcomp(validate.outliers.training.set[,2:8])$x)
plot(pc.df$PC1,pc.df$PC2, col=validate.outliers.training.set$class, main = "hand classified")
plot(pc.df$PC1,pc.df$PC2, col=predict(m, validate.outliers.training.set[,2:8]), main = "ML predicted")
#check prediction
table(diff.frame$class)
##
## accept reject
## 1796 1690
par(mfrow=c(1,1))
hist(diff.frame$prob.accept)
#split confidently identified SNPs by class
vetted.lfmm.hum.outliers<-diff.frame[diff.frame$prob.accept >.8,]
#add order column from allsigs to visualize this on a manhattan plot
all.sigs.out <- all.sigs[paste(all.sigs$chrom, all.sigs$pos) %in% vetted.lfmm.hum.outliers$id,]
all.sigs.out<-data.frame(order=all.sigs.out$order,
id=paste(all.sigs.out$chrom,all.sigs.out$pos))
vetted.lfmm.hum.outliers<-merge(all.sigs.out,vetted.lfmm.hum.outliers,by="id")
#pull allele freqs for outliers
phase1.all.freqs.vetted.outliers<- phase1.all.freqs[phase1.all.freqs$id %in% vetted.lfmm.hum.outliers$id,]
#identify the outlier pop
outlier.pop<-c()
for (i in 1:nrow(phase1.all.freqs.vetted.outliers)){
outlier.pop[i]<-colnames(phase1.all.freqs.vetted.outliers)[3:16][outlier(as.numeric(phase1.all.freqs.vetted.outliers[i,3:16]), logical = TRUE)]
}
#add to df
vetted.lfmm.hum.outliers$outlier.pop<-outlier.pop
nrow(lfmm.hum.outliers)
## [1] 10981
nrow(cor.frame.outliers)
## [1] 3920
nrow(cor.frame.env)
## [1] 4989
nrow(vetted.lfmm.hum.env)
## [1] 2088
table(vetted.lfmm.hum.outliers$outlier.pop)
##
## X.3.635 X.3.862 X.8.821 X0.384 X0.77 X11.233 X11.891
## 14 4 661 712 17 2 95
#write.csv(vetted.lfmm.hum.outliers[,c(1:2,12)], file = "~/Downloads/vetted.lfmm.hum.outliers.csv")
3.635 = Kenya Mbogolo (KE) 3.862 = Kenya Junju (KE) 8.821 = Angola Luanda (AO) 0.384 = Gabon Libreville (GA) 0.77 = Uganda Tororo (UG) 11.233 = Burkina Faso Bana (BF) 11.891 = Guinea Bissau Antula (GW)
#bring in selective sweep data for select populations
KE<-read.csv(file="~/Downloads/KE.hstats.csv")
AO<-read.csv(file="~/Downloads/AO.hstats.csv")
GA<-read.csv(file="~/Downloads/GA.hstats.csv")
UG<-read.csv(file="~/Downloads/UG.hstats.csv")
BFM<-read.csv(file="~/Downloads/BFM.hstats.csv")
BFS<-read.csv(file="~/Downloads/BFS.hstats.csv")
GW<-read.csv(file="~/Downloads/GW.hstats.csv")
#bring in all significant outliers
all.sigs<-read.csv(file="~/Desktop/anoph.phase2/all.sigs.fdr.adj.csv")
#bring in vetted outlier datasets
lfmm.hum.single.pop.outliers<-read.csv(file = "~/Downloads/vetted.lfmm.hum.outliers.csv")
lfmm.hum.env.assoc.outliers<-read.csv(file = "~/Downloads/vetted.lfmm.hum.env.csv")
#build a figure that is:
#line1: manhattan plot lfmm hum pvals, with red lines indicating vetted env. assoc. SNPs
#line2-5: genome plots of selective sweep metric for a single pop, with red lines indicating local outliers
#LFMM hum plot -log10(signficance vals) as manhattan plots
env<-ggplot(data=all.sigs, aes(x=order, y=-log10(hum.p), col = chrom))+
geom_scattermore()+
scale_color_manual(values=c("black","gray","black","gray","gray"))+
theme(panel.grid=element_blank(), panel.background=element_blank())+
geom_vline(xintercept=c(lfmm.hum.env.assoc.outliers$order),color='gray', alpha=.35)
GA.plot<-ggplot(data=GA, aes(x=order, y=h12))+
ylim(c(0,1))+
theme(panel.grid=element_blank(), panel.background=element_blank())+
geom_vline(xintercept=c(lfmm.hum.single.pop.outliers$order[lfmm.hum.single.pop.outliers$outlier.pop == "X0.384"]), color='gray', alpha=.35)+
geom_line()
AO.plot<-ggplot(data=AO, aes(x=order, y=h12))+
ylim(c(0,1))+
theme(panel.grid=element_blank(), panel.background=element_blank())+
geom_vline(xintercept=c(lfmm.hum.single.pop.outliers$order[lfmm.hum.single.pop.outliers$outlier.pop == "X8.821"]), color='gray', alpha=.35)+
geom_line()
GW.plot<-ggplot(data=GW, aes(x=order, y=h12))+
ylim(c(0,1))+
theme(panel.grid=element_blank(), panel.background=element_blank())+
geom_vline(xintercept=c(lfmm.hum.single.pop.outliers$order[lfmm.hum.single.pop.outliers$outlier.pop == "X11.891"]), color='gray', alpha=.35)+
geom_line()
UG.plot<-ggplot(data=UG, aes(x=order, y=h12))+
ylim(c(0,1))+
theme(panel.grid=element_blank(), panel.background=element_blank())+
geom_vline(xintercept=c(lfmm.hum.single.pop.outliers$order[lfmm.hum.single.pop.outliers$outlier.pop == "X0.77"]), color='gray', alpha=.35)+
geom_line()
KE.plot<-ggplot(data=KE, aes(x=order, y=h12))+
ylim(c(0,1))+
theme(panel.grid=element_blank(), panel.background=element_blank())+
geom_vline(xintercept=c(lfmm.hum.single.pop.outliers$order[lfmm.hum.single.pop.outliers$outlier.pop == "X3.635" | lfmm.hum.single.pop.outliers$outlier.pop == "X3.862" ]), color='gray', alpha=.35)+
geom_line()
BFS.plot<-ggplot(data=BFS, aes(x=order, y=h12))+
ylim(c(0,1))+
theme(panel.grid=element_blank(), panel.background=element_blank())+
geom_vline(xintercept=c(lfmm.hum.single.pop.outliers$order[lfmm.hum.single.pop.outliers$outlier.pop == "X11.233"]), color='gray', alpha=.35)+
geom_line()
grid.arrange(env,GA.plot,AO.plot,GW.plot,UG.plot,KE.plot,BFS.plot, nrow = 7)